options(scipen = 999)
options(digits = 1)

brazil_bins <-
  brazil_data %>%
  filter(urb == 1) %>%
  mutate(pop_logged = log10(pop)) %>%
  mutate(bins = cut(pop_logged, breaks = seq(from = min(pop_logged), to = max(pop_logged), by = (max(pop_logged) - min(pop_logged))/20), dig.lab = 6)) %>%
  group_by(bins) %>%
  mutate(n_obs = n()) %>% 
  mutate(prop_comm_health_avg =  mean(prop_comm_health, na.rm = T),
         prop_doctors_avg = mean(prop_doctos, na.rm = T),
         schools_prop_avg = mean(schools_prop, na.rm = T)) %>%
  dplyr::select(n_obs, prop_comm_health_avg, prop_doctors_avg, schools_prop_avg) %>%
  distinct() %>%
  filter(!is.na(bins)) %>%
  pivot_longer(cols = prop_comm_health_avg:schools_prop_avg, names_to = "names", values_to = "values") %>%
  mutate(labels = case_when(names == "prop_comm_health_avg" ~ "Health Centers per 1000 Residents",
                            names == "prop_doctors_avg" ~ "Doctors per 1000 Residents",
                            names == "schools_prop_avg" ~ "Schools per 1000 Residents",
                            TRUE ~ NA_character_
  )) %>%
  mutate(bins_split = bins,
         bins_split = str_replace_all(bins_split, "\\(|\\)|\\[|\\]", "")) %>%
  separate(bins_split, into = c("start_bin", "end_bin"), sep = ",") %>%
  mutate_at(vars(start_bin, end_bin), list(~as.numeric(.))) %>%
  mutate(bin_position = (start_bin + end_bin)/2) %>%
  ungroup() %>%
  dplyr::select(bin_position, labels, values, n_obs)

brazil_plot <-
  brazil_data %>%
  filter(urb == 1) %>%
  dplyr::select(pop, prop_comm_health, prop_doctos, schools_prop) %>%
  pivot_longer(cols = prop_comm_health:schools_prop, names_to = "names", values_to = "values") %>%
  mutate(labels = case_when(names == "prop_comm_health" ~ "Health Centers per 1000 Residents",
                            names == "prop_doctos" ~ "Doctors per 1000 Residents",
                            names == "schools_prop" ~ "Schools per 1000 Residents",
                            TRUE ~ NA_character_
  )) %>%
  filter(names != "prop_doctos") %>%
  ggplot(aes(x=log10(pop), y = values)) +
  #geom_point(shape = 21, fill = "lightgrey", alpha = 0.1) +
  facet_wrap(labels~., scales = "free") +
  geom_smooth(method = "loess", color = "black") +
  geom_point(data = brazil_bins %>% filter(labels != "Doctors per 1000 Residents"), aes(x=bin_position, y = values, size = n_obs), shape = 21, alpha = 0.7, fill = "lightgrey") +
  stat_summary_bin(fun='mean', bins=20,
                   shape = 21, fill = "lightgrey",
                   alpha = 0,
                   color='black', size=1.5, geom='point') +
  theme_bw() +
  scale_x_continuous(limits = c(3, 7.25), 
                     breaks = c(3, 4, 5, 6, 7),
                     labels = c("3\n(1,000)", "4\n(10,000)", "5\n(100,000)", "6\n(1,000,000)", "7\n(10,000,000)")) +
  scale_colour_grey() +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major.x = element_blank(),
        axis.line.y.left = element_blank(),
        legend.position = "right",
        
        strip.background = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_blank(),
        axis.title.y = element_blank()) +
  labs(size='Observations', size = 4)  +
  xlab("Population, log scale")


ggsave("./_4_outputs/figure_1.pdf", plot = brazil_plot, width = 10, height = 4)
